library('tidyverse')
library('tidyquant')
library('tseries')
library('gghighlight')
library('gganimate')
#load('api_key.rdata')
“A time series is stationary if a shift in time doesn’t cause a change in the shape of the distribution. Basic properties of the distribution like the mean, variance, and covariance are constant over time.”
Stationary vs Seasonal Time series - http://www.statisticshowto.com/stationarity/
Most statistical models that have been developed assume stationarity.
set.seed(100)
# monthly data, normally distributed
# 100 stoltzcoin average trade price per month
# Standard deviation of 3
dates = seq(from = lubridate::ymd('2011-07-01'),
to = lubridate::ymd('2018-06-30'),
by = 'months')
n = length(dates)
mu = 100
sd = 3
time_series = tibble(date = dates, stoltzcoin = rnorm(n = n, mean = mu, sd = sd),
type = 'stationary', trend = 'none')
ggplot(time_series, aes(x = date, y = stoltzcoin)) +
geom_line() + geom_smooth(method = lm,
formula = y ~ x, se = FALSE) +
geom_hline(aes(yintercept = mean(stoltzcoin)), color="red")
ts_fit = lm(stoltzcoin ~ date, data = time_series)
ts_fit
##
## Call:
## lm(formula = stoltzcoin ~ date, data = time_series)
##
## Coefficients:
## (Intercept) date
## 1.082e+02 -5.002e-04
res = tibble(residuals = resid(ts_fit))
ggplot(res, aes(x = 1:nrow(res), y = residuals)) + geom_point() + geom_rug(sides = 'l')
ggplot(res, aes(x = residuals)) + geom_histogram(aes(y = ..density..), bins = 40) + geom_density(col = 'red', size = 1.5)
# adf.test only accepts univariate time series data, zoo object will be used
adf.test(zoo::zoo(time_series$stoltzcoin, time_series$date))
## Warning in adf.test(zoo::zoo(time_series$stoltzcoin, time_series$date)): p-
## value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: zoo::zoo(time_series$stoltzcoin, time_series$date)
## Dickey-Fuller = -4.1967, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
going_up_by = 2
trend = seq(from = 0,
to = (n-1) * going_up_by,
by = going_up_by)
ts_lin_trend = tibble(date = dates, stoltzcoin = time_series$stoltzcoin + trend,
type = 'non-stationary', trend = 'linear')
ggplot(ts_lin_trend, aes(x = date, y = stoltzcoin)) +
geom_line() + geom_smooth(method = lm,
formula = y ~ x, se = FALSE) +
geom_hline(aes(yintercept = mean(stoltzcoin)), color="red")
ts_fit = lm(stoltzcoin ~ date, data = ts_lin_trend)
ts_fit
##
## Call:
## lm(formula = stoltzcoin ~ date, data = ts_lin_trend)
##
## Coefficients:
## (Intercept) date
## -887.75051 0.06521
res = tibble(residuals = resid(ts_fit))
ggplot(res, aes(x = 1:nrow(res), y = residuals)) + geom_point() + geom_rug(sides = 'l')
ggplot(res, aes(x = residuals)) + geom_histogram(aes(y = ..density..), bins = 40) + geom_density(col = 'red', size = 1.5)
adf.test(zoo::zoo(ts_lin_trend$stoltzcoin, ts_lin_trend$date))
## Warning in adf.test(zoo::zoo(ts_lin_trend$stoltzcoin, ts_lin_trend$date)):
## p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: zoo::zoo(ts_lin_trend$stoltzcoin, ts_lin_trend$date)
## Dickey-Fuller = -4.1967, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
going_up_by = 2
trend = seq(from = 0,
to = (n-1) * going_up_by,
by = going_up_by)
seasonal = sin(1:n) * 10
ts_lin_trend_seasonal = tibble(date = dates, stoltzcoin = time_series$stoltzcoin + trend + seasonal,
type = 'non-stationary', trend = 'linear')
ggplot(ts_lin_trend_seasonal, aes(x = date, y = stoltzcoin)) +
geom_line() + geom_smooth(method = lm,
formula = y ~ x, se = FALSE) +
geom_hline(aes(yintercept = mean(stoltzcoin)), color="red")
ts_fit = lm(stoltzcoin ~ date, data = ts_lin_trend_seasonal)
ts_fit
##
## Call:
## lm(formula = stoltzcoin ~ date, data = ts_lin_trend_seasonal)
##
## Coefficients:
## (Intercept) date
## -887.83081 0.06523
res = tibble(residuals = resid(ts_fit))
ggplot(res, aes(x = 1:nrow(res), y = residuals)) + geom_point() + geom_rug(sides = 'l')
ggplot(res, aes(x = residuals)) + geom_histogram(aes(y = ..density..), bins = 40) + geom_density(col = 'red', size = 1.5)
adf.test(zoo::zoo(ts_lin_trend_seasonal$stoltzcoin, ts_lin_trend_seasonal$date))
## Warning in adf.test(zoo::zoo(ts_lin_trend_seasonal$stoltzcoin,
## ts_lin_trend_seasonal$date)): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: zoo::zoo(ts_lin_trend_seasonal$stoltzcoin, ts_lin_trend_seasonal$date)
## Dickey-Fuller = -6.4213, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
going_up_by = 2
trend = seq(from = 0,
to = (n-1) * going_up_by,
by = going_up_by)
seasonal = sin(1:n) * 1:n
ts_lin_trend_seasonal_exp = tibble(date = dates, stoltzcoin = time_series$stoltzcoin + trend + seasonal,
type = 'non-stationary', trend = 'linear')
ggplot(ts_lin_trend_seasonal_exp, aes(x = date, y = stoltzcoin)) +
geom_line() + geom_smooth(method = lm,
formula = y ~ x, se = FALSE) +
geom_hline(aes(yintercept = mean(stoltzcoin)), color="red")
ts_fit = lm(stoltzcoin ~ date, data = ts_lin_trend_seasonal_exp)
ts_fit
##
## Call:
## lm(formula = stoltzcoin ~ date, data = ts_lin_trend_seasonal_exp)
##
## Coefficients:
## (Intercept) date
## -925.35762 0.06756
res = tibble(residuals = resid(ts_fit))
ggplot(res, aes(x = 1:nrow(res), y = residuals)) + geom_point() + geom_rug(sides = 'l')
ggplot(res, aes(x = residuals)) + geom_histogram(aes(y = ..density..), bins = 40) + geom_density(col = 'red', size = 1.5)
adf.test(zoo::zoo(ts_lin_trend_seasonal_exp$stoltzcoin, ts_lin_trend_seasonal_exp$date))
## Warning in adf.test(zoo::zoo(ts_lin_trend_seasonal_exp$stoltzcoin,
## ts_lin_trend_seasonal_exp$date)): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: zoo::zoo(ts_lin_trend_seasonal_exp$stoltzcoin, ts_lin_trend_seasonal_exp$date)
## Dickey-Fuller = -6.2925, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
d1y = as.numeric(lubridate::year(min(ts_lin_trend_seasonal_exp$date)))
d1m = as.numeric(lubridate::month(min(ts_lin_trend_seasonal_exp$date)))
d2y = as.numeric(lubridate::year(max(ts_lin_trend_seasonal_exp$date)))
d2m = as.numeric(lubridate::month(max(ts_lin_trend_seasonal_exp$date)))
x = ts(ts_lin_trend_seasonal_exp$stoltzcoin, start=c(d1y, d1m), end=c(d2y, d2m), frequency=12)
plot(decompose(x, type = 'additive'))
plot(decompose(x, type = 'multiplicative'))
require(graphics)
m <- decompose(co2)
plot(m)
dow = tq_index("DOW") %>%
tq_get(get = "stock.prices")
head(dow, 10)
p = ggplot(dow, aes(x = date, y = close, col = symbol)) +
geom_line() +
gghighlight(max(close) > 250) +
theme_minimal() +
facet_wrap(~ symbol)
print(p)
ba = dow %>%
filter(symbol == 'BA') %>%
select(date, close)
plotly::ggplotly(ggplot(ba, aes(x = date, y = close)) + geom_line() + geom_smooth(method = lm, se = FALSE))
adf.test(zoo::zoo(ba$close, ba$date))
## Warning in adf.test(zoo::zoo(ba$close, ba$date)): p-value greater than
## printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: zoo::zoo(ba$close, ba$date)
## Dickey-Fuller = 0.29712, Lag order = 13, p-value = 0.99
## alternative hypothesis: stationary
ts_fit = lm(close ~ date, data = ba)
ts_fit
##
## Call:
## lm(formula = close ~ date, data = ba)
##
## Coefficients:
## (Intercept) date
## -735.91762 0.05393
res = tibble(residuals = resid(ts_fit))
ggplot(res, aes(x = 1:nrow(res), y = residuals)) + geom_point() + geom_rug(sides = 'l')
ggplot(res, aes(x = residuals)) + geom_histogram(aes(y = ..density..), bins = 40) + geom_density(col = 'red', size = 1.5)
ret = ba %>%
tq_transmute(close, mutate_fun = dailyReturn)
ggplot(ret, aes(x = date, y = daily.returns)) + geom_line() + geom_smooth(method = lm, se = FALSE)
adf.test(zoo::zoo(ret$daily.returns, ret$date))
## Warning in adf.test(zoo::zoo(ret$daily.returns, ret$date)): p-value smaller
## than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: zoo::zoo(ret$daily.returns, ret$date)
## Dickey-Fuller = -14.31, Lag order = 13, p-value = 0.01
## alternative hypothesis: stationary
ts_fit = lm(daily.returns ~ date, data = ret)
ts_fit
##
## Call:
## lm(formula = daily.returns ~ date, data = ret)
##
## Coefficients:
## (Intercept) date
## -8.482e-03 5.807e-07
res = tibble(residuals = resid(ts_fit))
ggplot(res, aes(x = 1:nrow(res), y = residuals)) + geom_point() + geom_rug(sides = 'l')
ggplot(res, aes(x = residuals)) + geom_histogram(aes(y = ..density..), bins = 40) + geom_density(col = 'red', size = 1.5)
ts_fit1 = lm(daily.returns ~ date, data = ret)
price = tibble(residuals = resid(ts_fit1))
ts_fit2 = lm(close ~ date, data = ba)
price.diff = tibble(residuals = resid(ts_fit2))
ba1 = bind_cols(daysFromStart = 1:nrow(res), price, price.diff)
ba2 = ba1 %>%
gather(price_type, price, -daysFromStart)
ba2
ggplot(ba2, aes(daysFromStart, price)) +
geom_point() +
# Here comes the gganimate code
transition_states(
price_type,
transition_length = 2,
state_length = 1
) +
view_follow()